#!/usr/bin/env python
# -*- coding: utf-8 -*-

from numpy import unique
from numpy import where
from sklearn.datasets import make_classification
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder
from matplotlib import pyplot
import numpy as np


class KMeansTest:

    def __init__(self) -> None:
        super().__init__()

    def test1(self):
        iris = datasets.load_iris()
        x = iris.data
        y = iris.target
        clf = KMeans(n_clusters=3)
        model = clf.fit(x)
        predicted = model.predict(x)
        print("预测值", predicted)
        print("真实值", y)
        print()

    def test2(self):
        # 定义数据集
        # X, _ = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1,
        #                            random_state=4)
        iris = datasets.load_iris()
        X = iris.data
        _ = iris.target
        # 定义模型
        model = KMeans(n_clusters=3)
        # 模型拟合
        model.fit(X)
        # 为每个示例分配一个集群
        yhat = model.predict(X)
        # 检索唯一群集
        clusters = unique(yhat)
        # 为每个群集的样本创建散点图
        for cluster in clusters:
            # 获取此群集的示例的行索引
            row_ix = where(yhat == cluster)
            # 创建这些样本的散布
            pyplot.scatter(X[row_ix, 0], X[row_ix, 1])
        # 绘制散点图
        pyplot.show()
        print("预测值", yhat)
        print("真实值", _)

    def test3(self):
        ohe = OneHotEncoder()
        x = [['天津', '涿州'], ['北京', '广州'], ['上海', '永济'], ['南京', '赤峰'], ['北京', '广州']]
        print(ohe.fit_transform(x).toarray())


if __name__ == "__main__":
    kmeans_test = KMeansTest()
    # kmeans_test.test1()
    # kmeans_test.test2()
    kmeans_test.test3()
